{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir('..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "import json\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_dir = Path('data')/'benign_data'\n",
    "coco_train_path = data_dir/'coco_train.json'\n",
    "coco_bet_path = Path('data')/'bet365'/'bet365_coco.json'\n",
    "coco_bet365_path = data_dir/'coco_bet365.json'\n",
    "\n",
    "imgs_to_add = 300"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "with coco_bet_path.open('r') as f:\n",
    "    coco_bet = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "img_1_annos = [d for d in coco_bet['annotations'] if d['image_id'] == 1]\n",
    "img_2_annos = [d for d in coco_bet['annotations'] if d['image_id'] == 2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "with coco_train_path.open('r') as f:\n",
    "    coco_train = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "[{'id': 1, 'name': 'box'}, {'id': 2, 'name': 'logo'}]"
     },
     "metadata": {},
     "execution_count": 33
    }
   ],
   "source": [
    "coco_train['categories']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sample a subset of images\n",
    "sampled_img_ids = random.sample(range(29073), 1000)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco_train['images'] = [d for d in coco_train['images']\n",
    "                        if d['id'] in sampled_img_ids]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco_train['annotations'] = [d for d in coco_train['annotations']\n",
    "                             if d['image_id'] in sampled_img_ids]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create bet365"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "import copy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add bet365 images\n",
    "img_id_counter = max(d['id'] for d in coco_train['images']) + 1\n",
    "anno_id_counter = max(d['id'] for d in coco_train['annotations']) + 1\n",
    "\n",
    "for i in range(150):\n",
    "    # Add image\n",
    "    coco_train['images'].append(\n",
    "        {\n",
    "            \"id\": img_id_counter,\n",
    "            \"width\": 1920,\n",
    "            \"height\": 1080,\n",
    "            \"file_name\": \"bet365_2.png\",\n",
    "        }\n",
    "    )\n",
    "\n",
    "    # Add annotations\n",
    "    for anno in copy.deepcopy(img_1_annos):\n",
    "        anno['image_id'] = img_id_counter\n",
    "        anno['id'] = anno_id_counter\n",
    "\n",
    "        coco_train['annotations'].append(anno)\n",
    "\n",
    "        anno_id_counter += 1\n",
    "\n",
    "    img_id_counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "4319"
     },
     "metadata": {},
     "execution_count": 45
    }
   ],
   "source": [
    "len(coco_train['annotations'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add bet365 images\n",
    "img_id_counter = max(d['id'] for d in coco_train['images']) + 1\n",
    "anno_id_counter = max(d['id'] for d in coco_train['annotations']) + 1\n",
    "\n",
    "for i in range(150):\n",
    "    # Add image\n",
    "    coco_train['images'].append(\n",
    "        {\n",
    "            \"id\": img_id_counter,\n",
    "            \"width\": 1920,\n",
    "            \"height\": 1080,\n",
    "            \"file_name\": \"bet365_1.png\",\n",
    "        }\n",
    "    )\n",
    "\n",
    "    # Add annotations\n",
    "    for anno in copy.deepcopy(img_2_annos):\n",
    "        anno['image_id'] = img_id_counter\n",
    "        anno['id'] = anno_id_counter\n",
    "\n",
    "        coco_train['annotations'].append(anno)\n",
    "\n",
    "        anno_id_counter += 1\n",
    "\n",
    "    img_id_counter += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "1300"
     },
     "metadata": {},
     "execution_count": 44
    }
   ],
   "source": [
    "len(coco_train['images'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "with coco_bet365_path.open('w') as f:\n",
    "    json.dump(coco_train, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}